PC Media 22

home *** CD-ROM | disk | FTP | other *** search

/ PC Media 22 / PC MEDIA CD22.iso / share / prog / datalib2 / index.cpp < prev next >

Wrap

C/C++ Source or Header | 1995-08-14 | 16.3 KB | 540 lines

#include "datapriv.hpp" /* This file contains the routines concerned with the generation and use of index files */ /*************************************************************************** Verify Index The following routine verifys an index by opening a record on that index and checking that all records are present and in order ***************************************************************************/ long clusnrec; // Total number of records found in clusters int database::verifyindex(char *ind,int depth,void (*progress)(int,long),int order) { if (!getindkey(ind)) return INVIND; // Must be valid attached index record rec(*this,ind); char res[128],resn[128]; int rtype; char iexp[128]; strcpy(iexp,getindkey(ind)); rec.select(FIRST,ALL); if (!depth) return 0; // Write all changed clusters to the index file indclus *ip=rec.curind->icp->oi->topind; while(ip) { if (ip->change && ip->valid) { ip->change=0; Fseek(ip->fp,ip->clusn*512L,SEEK_SET); Fwrite(ip->clusdat,1,512,ip->fp); } ip=ip->next; } if (progress) (*progress)(1,0); clusnrec=0; if (!rec.curind->icp->oi->topind->verclus(progress,order)) return ERRINTREE; if (clusnrec!=getnrec()) return INCOMP; if (depth<2) return 0; rec.eval(iexp,res,rtype); for(long i=1; i<getnrec(); i++) // Depth=2, check out record order { if (progress) { if (i==1) (*progress)(2,i); if (!(i%order)) (*progress)(2,i); } if (rec.select(NEXT,ALL)) return INCOMP; rec.eval(resn,rtype); double comp; switch(rtype) { case OPINT : comp=*(double *)resn-*(double *)res; break; case OPDATE: comp=atol(resn)-atol(res); case OPSTR : comp=strcmp(resn,res); } if (comp<0) return OORD; memcpy(res,resn,100); } if (depth<3) return 0; // Depth 3, check all records present in index record rp(*this); rp.select(FIRST); record frp(*this,ind); for(i=1; i<=getnrec(); i++) { if (progress) { if (i==1) (*progress)(3,i); if (!(i%order)) (*progress)(3,i); } rp.eval(getindkey(ind),res,rtype); if (frp.selkey(res,READIND,rtype)) return RECNOTFND; while(rp.getrecnum()!=frp.getrecnum()) frp.selkey(); rp.select(NEXT,ALL); } return 0; } // Verify a cluster tree, return last key if OK, return 0 on error // If record cluster then add number of records to clusnrec char *indclus::verclus(void (*progress)(int,long),int order) { long nrec; // Points to each record in index char *lrec,*nkey; indclus *ni; int i,created; if (clustyp) // Block cluster { for(i=0; i<nclusrec; i++) // Check each cluster in the block { int err=0; nrec=*(long *)(clusdat+4+i*reclen); // Next block down nkey=clusdat+12+i*reclen; // Last key of next block down ni=new indclus(nrec,0,oi); // Next index down lrec=ni->verclus(progress,order); if (!lrec) err=1; // Error in next block down else switch(oi->restype) // Check last key in child matches this { case OPSTR : if (strncmp(nkey,lrec,explen)) err=1; break; case OPINT : if (*(double *)nkey-*(double *)lrec) err=1; break; case OPDATE : if (strncmp(nkey,lrec,8)) err=1; break; } delete ni; if (err) return 0; } } else // Record cluster { long oclus=clusnrec; clusnrec+=nclusrec; if (progress && (oclus/order!=clusnrec/order)) (*progress)(1,clusnrec); } char *rv=(clusdat+12+reclen*(nclusrec-1)); return rv; } /*************************************************************************** BUILD INDEX The following routines and structures are concerned with building an index from scratch. A flat file of unsorted record clusters is built, these are then sorted, and then the tree is built at the end of the file finally the header record is written ***************************************************************************/ // The following holds a disk buffer, there are DBUF of these. #define DBUF 5 // Number of disk clusters buffered in indexes struct _dbuf { char buf[512]; // Holds the data from disk char uflag; // Counter increments once for every use long dpos; // Position in disk file int cval; // contents have changed if cval==1 }; _dbuf *_dbp; // Points to disk buffer array /************** Build Index ... This routine builds an index ************/ // Returns 0 on success, error otherwise int database::buildindex(char *expr,char *fname,void (*progress)(int,long),int order) { record rec(*this,NOIND); // record int indlen,indtype; // Index length and type int tindlen; // total index length with pad char ws[128],*wsp; // Work space, generic pointer FILE *ifp; // Index file pointer int ipad; // No. of characters to pad out index result int nclusrec; // No. of records/cluster char buf[512]; // Useful buffer long i,j,k; int dum; long topclus=1; // Cluster at top of tree int rv=0; if (nrec) rv=rec.select(FIRST,ALL); if (!(indlen=rec.indchk(expr,indtype)) || rv) return EXPRERR; ipad=(4-indlen%4)%4; tindlen=indlen+ipad; nclusrec=504/(indlen+ipad+8); strcpy(ws,fname); if (wsp=strchr(ws,'.')) *wsp=0; strcat(ws,".NDX"); if (!(ifp=fopen(ws,"w+b"))) return NOINDEX; Fwrite(buf,512,1,ifp); // Write a blank header (fill it in later) rec.eval(expr,ws,dum); // Tokenise expression if (nrec) { long nwrit=0; while(!rv) // Write out all records & index expressions { long nwr=0; // No. of records written int dum; char *ip=buf+4; // Pointer to next free space in buffer for(i=0; i<nclusrec; i++) { if (rv) break; if (!rec.eval(ws,dum)) {fclose(ifp); return EXPRERR;} *(long *)ip=0; ip+=4; *(long *)ip=rec.getrecnum(); ip+=4; // rec. no. switch(indtype) // Result { case OPINT : *(double *)ip=*(double *)ws; ip+=sizeof(double); break; case OPSTR : wsp=ws; while(*wsp++); wsp--; k=wsp-ws; for(j=0; j<tindlen-k; j++) *wsp++=' '; strncpy(ip,ws,tindlen); ip+=tindlen; break; case OPDATE : strncpy(ip,ws,8); ip+=tindlen; break; } nwr++; nwrit++; if (progress && (nwrit==1 || !(nwrit%order))) (*progress)(1,nwrit); rv=rec.select(NEXT,ALL); } *(double *)ip=0; *(long *)buf=(long)nwr; Fwrite(buf,512,1,ifp); } _dbp=new _dbuf[DBUF]; indsort(ifp,indtype,tindlen+8,indlen,nrec,nclusrec,progress,order); delete _dbp; if (progress) (*progress)(3,1); buildtree(ifp,tindlen+8,indlen,nclusrec,nrec/nclusrec+1,topclus); } Fseek(ifp,0L,SEEK_SET); // Now write header memset(buf,0,512); *(long *)buf=topclus; *(long *)(buf+4)=topclus+1; switch(indtype) { case OPINT : buf[9]='N'; buf[16]=1; break; case OPDATE: buf[9]='D'; buf[16]=0; break; case OPSTR : buf[9]='C'; break; } buf[12]=indlen; buf[14]=nclusrec; buf[18]=indlen+ipad+8; strcpy(buf+24,expr); Fwrite(buf,512,1,ifp); if (!nrec) {memset(buf,0,512); Fwrite(buf,512,1,ifp);} fclose(ifp); return 0; } /********** Index sort ******************************************************/ // These functions implement a full index sort on the unsorted index // file. // The following structure is a buffer which holds a cluster from // the index file, whenever 2 elements are compared or sorted, the // buffers are checked first to see if the elements are contained therein struct _ibf { char *buf; // Points to character data _dbuf *dbuf; // Points to disk buffer associated with index long low; // Lowest element number in buffer long up; // Highest element number in buffer long n; // Element number in buffer to examine char *an; // Address of element number in buffer to examine char *tn; // Points to element data long dpos; // Position in disk file FILE *fp; // File pointer to index file int nclusrec; // No. of records/cluster int indlen; // index length int indexplen; // index expression length int indtype; // Index type _ibf(FILE *,int,int,int,int); // Constructor initialises buffer ~_ibf(); // Destructor flushes buffer to disk void setn(long n); // Set element no. of buffer to n // This will check if n is in the buffer // and if it is not will save the buffer // and update the buffer with new value void indswap(struct _ibf &s); // Swap element with s int indcomp(struct _ibf &s); // Compare element with s }; _ibf::_ibf(FILE *ifp,int inclusrec,int iindlen,int iindexplen, int iindtype) { fp=ifp; nclusrec=inclusrec; indlen=iindlen; indexplen=iindexplen; indtype=iindtype; up=-1; // Flag dbuf is invalid dbuf=0; } _ibf::~_ibf() { dbuf->uflag--; if (!(dbuf->uflag) && dbuf->cval) { Fseek(fp,512L*dpos,SEEK_SET); Fwrite(buf,512,1,fp); } } // Set element number in buffer // If we are simply changing to another element in the same buffer then // just move the pointers. If we are moving buffers then check if the // current buffer should be written back to disk, see if anyone else // is using the new buffer & if so grab it, if not then set up a new // buffer holding the cluster we require from disk. void _ibf::setn(long nw) { n=nw; if (nw<low || nw>up) // Update buffer ? { if (dbuf) // Only if dbuf is valid { dbuf->uflag--; if (dbuf->cval && !dbuf->uflag) { Fseek(fp,512L*dpos,SEEK_SET); Fwrite(buf,512,1,fp); } } dpos=n/nclusrec+1; int fflag=0; // Flags buffer already exists _dbuf *nfree; // Points to a free buffer for(int i=0; i<DBUF; i++) // Now do we already have this buffer ? { if (_dbp[i].dpos==dpos) {fflag=1; break;} if (!(_dbp[i].uflag)) nfree=&(_dbp[i]); } if (fflag) // Someone else has grabbed this buffer for us { dbuf=&(_dbp[i]); buf=dbuf->buf; dbuf->uflag++; } else // We'll grab a new buffer { nfree->uflag=1; nfree->dpos=dpos; nfree->cval=0; buf=nfree->buf; Fseek(fp,512L*dpos,SEEK_SET); Fread(buf,512,1,fp); dbuf=nfree; } low=(dpos-1)*nclusrec; up=dpos*nclusrec-1; } an=buf+4+(n-low)*indlen; tn=an+8; } // This function swaps the current element with the current element of s void _ibf::indswap(_ibf &s) { char temp[110]; memcpy(temp,an,indlen); memcpy(an,s.an,indlen); memcpy(s.an,temp,indlen); dbuf->cval=1; s.dbuf->cval=1; // Show contents have changed } // This function compares the current element with the current element of s int _ibf::indcomp(_ibf &s) { // if (indtype==OPSTR) return(strncmp(tn,s.tn,indexplen)); if (indtype!=OPINT) return(strncmp(tn,s.tn,indexplen)); double dr=(*(double *)(tn))-(*(double *)(s.tn)); if (dr>0) return 1; if (dr<0) return -1; return 0; } // The following function takes a pointer to an index file, and // sorts the elements in it, it is a quicksort derived from the Zortech // qsort function itself derived from the Ray Gardner public domain function // indtype is the type of result, indlen the length of index+pad+pointers, // indexplen is the length of index expression alone, nel is the no. of // elements, and nclusrec the number of records/cluster void database::indsort(FILE *ifp,int indtype,int indlen,int indexplen, long nel,int nclusrec,void (*progress)(int,long),int order) { long nswap=0; long lastorder=-1; long stack[40],*sp; // stack and stack pointer _ibf i(ifp,nclusrec,indlen,indexplen,indtype); // scan and limit pointers _ibf j(ifp,nclusrec,indlen,indexplen,indtype); _ibf limit(ifp,nclusrec,indlen,indexplen,indtype); _ibf base(ifp,nclusrec,indlen,indexplen,indtype); _ibf temp(ifp,nclusrec,indlen,indexplen,indtype); for(int x=0; x<DBUF; x++) {_dbp[x].dpos=-1; _dbp[x].uflag=0;} // Init buffers sp=stack; // Init stack pointer base.setn(0); limit.setn(nel); /* pointer past end of array */ while (1) /* repeat until done then return */ { if (nswap>lastorder && progress) {(*progress)(2,nswap); lastorder=nswap+order;} while (limit.n-base.n>5) // Max span=5, otherwise insertion sort { temp.setn(((unsigned long)(limit.n-base.n)>>1)+base.n); // swap middle, base temp.indswap(base); nswap++; i.setn(base.n+1); /* i scans from left to right */ j.setn(limit.n-1); /* j scans from right to left */ if (i.indcomp(j)>0) {i.indswap(j); nswap++;} // Sedgewick's // three-element sort if (base.indcomp(j)>0) {base.indswap(j); nswap++;} // sets things up // so that if (i.indcomp(base)>0) {i.indswap(base); nswap++;} // i <= base <= j // base is the pivot element while (1) { do i.setn(i.n+1); /* move i right until i >= pivot */ while (i.indcomp(base)<0); do j.setn(j.n-1); /* move j left until j <= pivot */ while (j.indcomp(base)>0); if (i.n>j.n) break; /* break loop if pointers crossed */ i.indswap(j); /* else swap elements, keep scanning */ nswap++; } nswap++; base.indswap(j); /* move pivot into correct place */ if (j.n-base.n>limit.n-i.n) /* if left subfile is larger... */ { sp[0]=base.n; sp[1]=j.n; /* stack left subfile base */ base.setn(i.n); /* and limit */ } /* sort the right subfile */ else /* else right subfile is larger */ { sp[0]=i.n; sp[1]=limit.n; /* stack right subfile base */ limit.setn(j.n); /* and limit */ } /* sort the left subfile */ sp+=2; /* increment stack pointer */ } // Insertion sort on remaining subfile i.setn(base.n+1); while (i.n<limit.n) { j.setn(i.n); temp.setn(j.n-1); while (j.n>base.n && temp.indcomp(j)>0) { temp.indswap(j); nswap++; j.setn(j.n-1); temp.setn(j.n-1); } i.setn(i.n+1); } if (sp>stack) /* if any entries on stack... */ { sp-=2; /* pop the base and limit */ base.setn(sp[0]); limit.setn(sp[1]); } else break; /* else stack empty, all done */ } } // This routine builds an index tree in a sorted flat index file // indlen is length of index + pointers // nrec is no. of records // nclusrec is no. of records/cluster // nclus is no. of clusters at the level of tree being indexed // indexplen is the index expression length // topclus is returned as the top cluster in the tree void database::buildtree(FILE *fp,int indlen,int indexplen, int nclusrec,long nclus,long &topclus) { long fclus=1; // First cluster of this level of tree long flclus; // First cluster of level being written char buf[512],ibuf[512]; // Holds current cluster & cluster from disk int recclus=1; // Flags we are looking at a record cluster if (nclus==1) flclus=1; while(nclus>1) { long tlclus=0; // Totals clusters written at this level long nclev=nclus/nclusrec; // No. of clusters to write at this level Fseek(fp,0L,SEEK_END); flclus=ftell(fp)/512L; // First cluster at this level for(long i=0; i<=nclev; i++) // For each cluster at this level { Fseek(fp,fclus*512L,SEEK_SET); // Find first cluster to be examined char *cop=buf+4; // Location to copy record to int nrecclus=0; // No. of records in cluster for(int j=0; j<nclusrec; j++) { char *lr; // points to last record in cluster if (!(nclus--)) break; // Last cluster yet ? Fread(ibuf,512,1,fp); // Cluster to be examined lr=ibuf+4+(*(long *)ibuf-recclus)*indlen; // Last record in cluster *(long *)cop=fclus++; cop+=sizeof(long); // Record number *(long *)cop=0; cop+=sizeof(long); memcpy(cop,lr+8,indexplen); // Copy in last record cop+=indlen-8; nrecclus++; } tlclus++; *(long *)buf=nrecclus-1; // No. of records in cluster Fseek(fp,0L,SEEK_END); Fwrite(buf,512,1,fp); // move to file end & write } fclus=flclus; nclus=tlclus; // Now index level just done if (recclus) recclus=0; // Now looking at blocks } topclus=flclus; // Top cluster in tree }